from pyspark.sql import SparkSession

# https://www.kaggle.com/datasets/timmofeyy/all-the-airport-across-the-world-dataset
if __name__ == '__main__':
    spark = SparkSession.builder.appName("group_learn").master("local[*]").getOrCreate()
    data = [("Tom", 28), ("John", 21), ("Mike", 21), ("Sara", 25)]
    columns = ["name", "age"]
    df = spark.createDataFrame(data, columns)
    df.groupby(['name', 'age']).count().show()
    spark.stop()
